Looking into rat sighting v waste data

waste <-
  read.csv("DSNY_Monthly_Tonnage_Data_20231202.csv") |> 
  janitor::clean_names() |> 
  separate(month, into = c("year", "month"), sep = " / ") |> 
  group_by(year, month, borough) |>
  filter(year >= 2016 & year < 2024 ) |>
  summarize(
    total_refuse = sum(refusetonscollected, na.rm = TRUE),
    total_paper = sum(papertonscollected, na.rm = TRUE),
    total_mgp = sum(mgptonscollected, na.rm = TRUE)) |> 
  mutate_all(tolower) |> 
  mutate(across(where(is.character), trimws)) 
sightings <-
  read_csv('NYC_Rat_Sightings.csv') |> 
  janitor::clean_names() |> 
  separate(created_date, into=c("month","e", "day","f", "year", "g", "time"), sep=c(2,3,5,6,10,11)) |> 
  select(-e,-f,-g) |> 
  mutate(date = paste(year, month, day, sep=""), 
         date = as.numeric(date)) |>  
  filter(date <= 20231031, date >= 20160101, !incident_zip <= 10000, !incident_zip >11697, !borough %in% c("Unspecified", NA)) |> 
  select(
    -agency, -agency_name, -complaint_type, -descriptor, -landmark, -facility_type, -park_facility_name, -vehicle_type, -taxi_company_borough, -taxi_pick_up_location, -bridge_highway_name, -road_ramp, -bridge_highway_segment, -bridge_highway_direction) |>
  select(unique_key, date, year, month, day, everything()) |> 
  mutate_all(tolower) |>
  mutate(across(where(is.character), trimws)) |> 
  group_by(year, month, borough) |> 
  summarize(ratcount = n()) 
merged = inner_join(waste, sightings, 
            by = c("year", "month", "borough")) 

visualization

ggplot(merged, aes(x=month, y = ratcount, color = borough, group = interaction(year, borough))) + 
    geom_line() + 
    labs(title = "Total Rat Count by Borough",
         x = "Month",
         y = "Rat Count") +
  facet_wrap(year ~ . )

ggplot(merged, aes(x = month, y = total_refuse, color = borough, group = interaction(year, borough))) +
    geom_point() + geom_line() +
  theme(legend.position = "bottom") +
  facet_wrap(year~.) +
  scale_y_discrete(breaks = seq(10000,90000, by = 10000))

tr_ggplot = 
  merged |> 
  mutate(borough = fct_reorder(borough, total_refuse)) |> 
  ggplot(aes(x = borough, y = total_refuse, fill = borough)) +
  geom_boxplot() +
  scale_y_discrete(breaks = seq(10000,90000, by = 10000)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplotly(tr_ggplot)
tp_ggplot = 
  merged |> 
  mutate(borough = fct_reorder(borough, total_paper)) |> 
  ggplot(aes(x = borough, y = total_paper, fill = borough)) +
  geom_boxplot() +
  scale_y_discrete(breaks = seq(10000,90000, by = 10000)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplotly(tp_ggplot)
merged |> 
  mutate(borough = fct_reorder(borough, total_refuse)) |> 
  plot_ly(x = ~borough, y = ~total_refuse, color = ~borough, type = "bar", colors = "viridis") 

Need to plot multivariate graph